{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 中文分词\n",
    "- 中文分词算法：https://blog.csdn.net/Yellow_python/article/details/83304059\n",
    "- jieba基础：https://blog.csdn.net/Yellow_python/article/details/80559586"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# jieba中文分词HMM示例\n",
    "- finalseg\n",
    "- posseg"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## finalseg"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import jieba\n",
    "def _print(words): print('  '.join(words))\n",
    "text = '柳梦璃入梦C法'"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "柳  梦  璃  入梦  C  法\n"
     ]
    }
   ],
   "source": [
    "_print(jieba.cut(text, HMM=False))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "柳梦璃  入梦  C  法\n"
     ]
    }
   ],
   "source": [
    "_print(jieba.cut(text))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "柳梦璃入  梦  C法\n"
     ]
    }
   ],
   "source": [
    "jieba.finalseg.emit_P['B']['C'] = -1e-9  # begin\n",
    "_print(list(jieba.finalseg.__cut(text)))  # 不带词库分词"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "柳  梦璃  入梦  C  法\n"
     ]
    }
   ],
   "source": [
    "jieba.finalseg.emit_P['M']['梦'] = -100  # middle\n",
    "_print(jieba.cut(text))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "柳  梦  璃  入梦  C  法\n"
     ]
    }
   ],
   "source": [
    "jieba.finalseg.emit_P['S']['梦'] = -.1  # single\n",
    "_print(jieba.cut(text))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "柳梦  璃  入梦  C  法\n"
     ]
    }
   ],
   "source": [
    "jieba.finalseg.emit_P['E']['梦'] = -.01  # end\n",
    "_print(jieba.cut(text))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "柳  梦  璃  入梦  C  法\n"
     ]
    }
   ],
   "source": [
    "jieba.del_word('柳梦')  # Force_Split_Words\n",
    "_print(jieba.cut(text))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## posseg"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.4"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
